from bs4 import BeautifulSoup
import pandas as pd

html = """
<html><head><title>The Dormouse's story</title></head>
<body>
<p class="title" name="dromouse"><b>The Dormouse's story</b></p>
<p class="story">Once upon a time there were three little sisters; and their names were
<a href="http://example.com/elsie" class="sister" id="link1"><!-- Elsie --></a>,
<a href="http://example.com/lacie" class="sister" id="link2">Lacie</a> and
<a href="http://example.com/tillie" class="sister" id="link3">Tillie</a>;
and they lived at the bottom of a well.</p>
<p class="story">...</p>
"""

# soup = BeautifulSoup(html)
soup = BeautifulSoup(open('测试的dom文档.html', 'r', encoding='UTF-8'))
list = soup.findAll('img', alt="暂无图片")
for obj in list:
    # soup.findAll('img',alt="暂无图片")[0].attrs['src']
    print(obj.attrs['src'])


# obj.select('td')[0].select("img")[0].attrs['src']
jsonHtml = {'goodsName': [], 'goodsId': [], 'sales1': [], 'sales7': [], 'sales30': [], 'increment7': [],
            'groupBuyPrice': [], 'picUrl': []}
list = soup.findAll('tr')
for obj in list:
    # print(obj)
    jsonHtml['goodsName'].append(obj.select('td')[1].text.split('\n')[1])
    jsonHtml['goodsId'].append(obj.select('td')[1].text.split("商品ID:")[1].split('\n')[0])
    jsonHtml['sales1'].append(obj.select('td')[2].text)
    jsonHtml['sales7'].append(obj.select('td')[3].text)
    jsonHtml['sales30'].append(obj.select('td')[4].text)
    jsonHtml['increment7'].append(obj.select('td')[5].text)
    jsonHtml['groupBuyPrice'].append(obj.select('td')[6].text)
    jsonHtml['picUrl'].append(obj.select('td')[0].select("img")[0].attrs['src'])
df = pd.DataFrame(jsonHtml)
df.head()

# print(soup.prettify())
